##### SUBGROUP PLOTS #####
library(tidyverse)
library(broom)
library(estimatr)
library(ggpubr)

# function to extract ATEs
Get_ATE <- function(data, subsample = c(0,1)) {
      out <- tidy(lm_robust(formula(data$FORMULA[1]), data, 
                            subset = TNRFU %in% subsample))
      out %>% slice_tail() %>%
            mutate(StudyID = data$StudyId[1])
}

# set working directory
setwd("data/analysis/cleaned")

#### GENDER ####
### MEN ###


# start by making an empty data set 
all_results <- tibble(term = NA, estimate = NA, std.error = NA, statistic = NA,
                      p.value = NA, conf.low = NA, conf.high = NA, df = NA, 
                      outcome = NA, StudyID = NA, subsample = NA)

for(i in 1:length(list.files())){
      files <- list.files()
      d <- read_csv(files[i])
      d <- filter(d, GENDER == 1)
      
      if(nrow(d) == 0){next}
      
      study_results <- bind_rows(Get_ATE(d),
                                 Get_ATE(d, 0), # eager respondents
                                 Get_ATE(d, 1)) # reluctant respondents
      study_results <- add_column(study_results, subsample = c("full", "eager", "reluctant"))
      
      all_results <- add_row(all_results, study_results)
}

men_results <- all_results[-1, ] #drop first row since it's all NAs

# PLOT DATA #

eagers <- filter(men_results, subsample == "eager")
reluctants <- filter(men_results, subsample == "reluctant")

plot_dat <- full_join(eagers, reluctants, by = "StudyID", suffix = c(".e", ".r"))

# creating significance-indicator variable for plot
plot_dat <- mutate(plot_dat, significance = case_when(
      p.value.e < .05 & p.value.r > .05 ~ "only eager",
      p.value.e < .05 & p.value.r < .05 ~ "both",
      p.value.e > .05 & p.value.r < .05 ~ "only reluctant",
      p.value.e > .05 & p.value.r > .05 ~ "neither"))

p1 <- plot_dat %>% filter(!is.na(significance)) %>% 
      ggplot(aes(estimate.e, estimate.r)) + 
      geom_abline(slope = 1, intercept = 0, color = "gray") + 
      geom_hline(yintercept = 0, alpha = .5, linetype = 2, color = "gray") + 
      geom_vline(xintercept = 0, alpha = .5, linetype = 2, color = "gray") + 
      geom_point(size = 1.1) + 
      geom_errorbar(aes(xmin = conf.low.e, xmax = conf.high.e), alpha = .15) + 
      geom_errorbar(aes(ymin = conf.low.r, ymax = conf.high.r), alpha = .15) + 
      xlab("") + 
      ylab("") +
      coord_cartesian(xlim =c(-1.5, 1.5), ylim = c(-1.5, 1.5)) +
      ggtitle("Men") +
      scale_color_manual(limits = c("both",  "neither", "only eager", "only reluctant"), 
                         values = c("#F8766D",  "#7CAE00", "#00BFC4", "#C77CFF")) +
      theme_classic() +
      theme(legend.position="none", 
            text = element_text(size = 14),
            aspect.ratio = 1.05,
            plot.title = element_text(hjust = 0.5, size = 10))



### WOMEN ###
# start by making an empty data set 
all_results <- tibble(term = NA, estimate = NA, std.error = NA, statistic = NA,
                      p.value = NA, conf.low = NA, conf.high = NA, df = NA, 
                      outcome = NA, StudyID = NA, subsample = NA)

for(i in 1:length(list.files())){
      files <- list.files()
      d <- read_csv(files[i])
      d <- filter(d, GENDER == 2)
      
      if(nrow(d) == 0){next}
      
      study_results <- bind_rows(Get_ATE(d),
                                 Get_ATE(d, 0), # eager respondents
                                 Get_ATE(d, 1)) # reluctant respondents
      study_results <- add_column(study_results, subsample = c("full", "eager", "reluctant"))
      
      all_results <- add_row(all_results, study_results)
}

women_results <- all_results[-1, ] #drop first row since it's all NAs

# PLOT DATA #

eagers <- filter(women_results, subsample == "eager")
reluctants <- filter(women_results, subsample == "reluctant")

plot_dat <- full_join(eagers, reluctants, by = "StudyID", suffix = c(".e", ".r"))

# creating significance-indicator variable for plot
plot_dat <- mutate(plot_dat, significance = case_when(
      p.value.e < .05 & p.value.r > .05 ~ "only eager",
      p.value.e < .05 & p.value.r < .05 ~ "both",
      p.value.e > .05 & p.value.r < .05 ~ "only reluctant",
      p.value.e > .05 & p.value.r > .05 ~ "neither"))

p2 <- plot_dat %>% filter(!is.na(significance)) %>% 
      ggplot(aes(estimate.e, estimate.r)) + 
      geom_abline(slope = 1, intercept = 0, color = "gray") + 
      geom_hline(yintercept = 0, alpha = .5, linetype = 2, color = "gray") + 
      geom_vline(xintercept = 0, alpha = .5, linetype = 2, color = "gray") + 
      geom_point(size = 1.1) + 
      geom_errorbar(aes(xmin = conf.low.e, xmax = conf.high.e), alpha = .15) + 
      geom_errorbar(aes(ymin = conf.low.r, ymax = conf.high.r), alpha = .15) + 
      xlab("") + 
      ylab("") +
      coord_cartesian(xlim =c(-1.5, 1.5), ylim = c(-1.5, 1.5)) +
      ggtitle("Women") +
      scale_color_manual(limits = c("only eager", "both", "only reluctant", "neither"), 
                         values = c("#00BFC4", "#F8766D", "#C77CFF", "#7CAE00")) +
      theme_classic() +
      theme(legend.position="none", 
            text = element_text(size = 14),
            aspect.ratio = 1.05,
            plot.title = element_text(hjust = 0.5, size = 10))




#### AGE (3 cat.) #####
## 18- to 39-year-olds ##
# set working directory


# start by making an empty data set 
all_results <- tibble(term = NA, estimate = NA, std.error = NA, statistic = NA,
                      p.value = NA, conf.low = NA, conf.high = NA, df = NA, 
                      outcome = NA, StudyID = NA, subsample = NA)

for(i in 1:length(list.files())){
      files <- list.files()
      d <- read_csv(files[i])
      d <- mutate(d, AGE3 = case_when(
            between(AGE, 18, 39) ~ "18-39",
            between(AGE, 40, 59) ~ "40-59",
            between(AGE, 60, 150) ~ "60+"
      ))
      d <- filter(d, AGE3 == "18-39")
      
      if(nrow(d) == 0){next}
      
      study_results <- bind_rows(Get_ATE(d),
                                 Get_ATE(d, 0), # eager respondents
                                 Get_ATE(d, 1)) # reluctant respondents
      study_results <- add_column(study_results, subsample = c("full", "eager", "reluctant"))
      
      all_results <- add_row(all_results, study_results)
}

age1_results <- all_results[-1, ] #drop first row since it's all NAs

# PLOT DATA #

eagers <- filter(age1_results, subsample == "eager")
reluctants <- filter(age1_results, subsample == "reluctant")

plot_dat <- full_join(eagers, reluctants, by = "StudyID", suffix = c(".e", ".r"))

# creating significance-indicator variable for plot
plot_dat <- mutate(plot_dat, significance = case_when(
      p.value.e < .05 & p.value.r > .05 ~ "only eager",
      p.value.e < .05 & p.value.r < .05 ~ "both",
      p.value.e > .05 & p.value.r < .05 ~ "only reluctant",
      p.value.e > .05 & p.value.r > .05 ~ "neither"))

p3 <- plot_dat %>% filter(!is.na(significance)) %>% 
      ggplot(aes(estimate.e, estimate.r)) + 
      geom_abline(slope = 1, intercept = 0, color = "gray") + 
      geom_hline(yintercept = 0, alpha = .5, linetype = 2, color = "gray") + 
      geom_vline(xintercept = 0, alpha = .5, linetype = 2, color = "gray") + 
      geom_point(size = 1.1) + 
      geom_errorbar(aes(xmin = conf.low.e, xmax = conf.high.e), alpha = .15) + 
      geom_errorbar(aes(ymin = conf.low.r, ymax = conf.high.r), alpha = .15) + 
      xlab("") + 
      ylab("") +
      coord_cartesian(xlim =c(-1.5, 1.5), ylim = c(-1.5, 1.5)) +
      ggtitle("18-39") +
      scale_color_manual(limits = c("only eager", "both", "only reluctant", "neither"), 
                         values = c("#00BFC4", "#F8766D", "#C77CFF", "#7CAE00")) +
      theme_classic() +
      theme(legend.position="none", 
            text = element_text(size = 14),
            aspect.ratio = 1.05,
            plot.title = element_text(hjust = 0.5, size = 10))



## 40- to 59-year-olds ##
# set working directory


# start by making an empty data set 
all_results <- tibble(term = NA, estimate = NA, std.error = NA, statistic = NA,
                      p.value = NA, conf.low = NA, conf.high = NA, df = NA, 
                      outcome = NA, StudyID = NA, subsample = NA)

for(i in 1:length(list.files())){
      files <- list.files()
      d <- read_csv(files[i])
      d <- mutate(d, AGE3 = case_when(
            between(AGE, 18, 39) ~ "18-39",
            between(AGE, 40, 59) ~ "40-59",
            between(AGE, 60, 150) ~ "60+"
      ))
      d <- filter(d, AGE3 == "40-59")
      
      if(nrow(d) == 0){next}
      
      study_results <- bind_rows(Get_ATE(d),
                                 Get_ATE(d, 0), # eager respondents
                                 Get_ATE(d, 1)) # reluctant respondents
      study_results <- add_column(study_results, subsample = c("full", "eager", "reluctant"))
      
      all_results <- add_row(all_results, study_results)
}

age2_results <- all_results[-1, ] #drop first row since it's all NAs

# PLOT DATA #

eagers <- filter(age2_results, subsample == "eager")
reluctants <- filter(age2_results, subsample == "reluctant")

plot_dat <- full_join(eagers, reluctants, by = "StudyID", suffix = c(".e", ".r"))

# creating significance-indicator variable for plot
plot_dat <- mutate(plot_dat, significance = case_when(
      p.value.e < .05 & p.value.r > .05 ~ "only eager",
      p.value.e < .05 & p.value.r < .05 ~ "both",
      p.value.e > .05 & p.value.r < .05 ~ "only reluctant",
      p.value.e > .05 & p.value.r > .05 ~ "neither"))

p4 <- plot_dat %>% filter(!is.na(significance)) %>% 
      ggplot(aes(estimate.e, estimate.r)) + 
      geom_abline(slope = 1, intercept = 0, color = "gray") + 
      geom_hline(yintercept = 0, alpha = .5, linetype = 2, color = "gray") + 
      geom_vline(xintercept = 0, alpha = .5, linetype = 2, color = "gray") + 
      geom_point(size = 1.1) + 
      geom_errorbar(aes(xmin = conf.low.e, xmax = conf.high.e), alpha = .15) + 
      geom_errorbar(aes(ymin = conf.low.r, ymax = conf.high.r), alpha = .15) + 
      xlab("") + 
      ylab("") +
      coord_cartesian(xlim =c(-1.5, 1.5), ylim = c(-1.5, 1.5)) +
      ggtitle("40-59") +
      scale_color_manual(limits = c("only eager", "both", "only reluctant", "neither"), 
                         values = c("#00BFC4", "#F8766D", "#C77CFF", "#7CAE00")) +
      theme_classic() +
      theme(legend.position="none", 
            text = element_text(size = 14),
            aspect.ratio = 1.05,
            plot.title = element_text(hjust = 0.5, size = 10))

## 60+-year-olds ##
# set working directory


# start by making an empty data set 
all_results <- tibble(term = NA, estimate = NA, std.error = NA, statistic = NA,
                      p.value = NA, conf.low = NA, conf.high = NA, df = NA, 
                      outcome = NA, StudyID = NA, subsample = NA)

for(i in 1:length(list.files())){
      files <- list.files()
      d <- read_csv(files[i])
      d <- mutate(d, AGE3 = case_when(
            between(AGE, 18, 39) ~ "18-39",
            between(AGE, 40, 59) ~ "40-59",
            between(AGE, 60, 150) ~ "60+"
      ))
      d <- filter(d, AGE3 == "60+")
      
      if(nrow(d) == 0){next}
      
      study_results <- bind_rows(Get_ATE(d),
                                 Get_ATE(d, 0), # eager respondents
                                 Get_ATE(d, 1)) # reluctant respondents
      study_results <- add_column(study_results, subsample = c("full", "eager", "reluctant"))
      
      all_results <- add_row(all_results, study_results)
}

age3_results <- all_results[-1, ] #drop first row since it's all NAs

# PLOT DATA #

eagers <- filter(age3_results, subsample == "eager")
reluctants <- filter(age3_results, subsample == "reluctant")

plot_dat <- full_join(eagers, reluctants, by = "StudyID", suffix = c(".e", ".r"))

# creating significance-indicator variable for plot
plot_dat <- mutate(plot_dat, significance = case_when(
      p.value.e < .05 & p.value.r > .05 ~ "only eager",
      p.value.e < .05 & p.value.r < .05 ~ "both",
      p.value.e > .05 & p.value.r < .05 ~ "only reluctant",
      p.value.e > .05 & p.value.r > .05 ~ "neither"))

p5 <- plot_dat %>% filter(!is.na(significance)) %>% 
      ggplot(aes(estimate.e, estimate.r)) + 
      geom_abline(slope = 1, intercept = 0, color = "gray") + 
      geom_hline(yintercept = 0, alpha = .5, linetype = 2, color = "gray") + 
      geom_vline(xintercept = 0, alpha = .5, linetype = 2, color = "gray") + 
      geom_point(size = 1.1) + 
      geom_errorbar(aes(xmin = conf.low.e, xmax = conf.high.e), alpha = .15) + 
      geom_errorbar(aes(ymin = conf.low.r, ymax = conf.high.r), alpha = .15) + 
      xlab("") + 
      ylab("") +
      coord_cartesian(xlim =c(-1.5, 1.5), ylim = c(-1.5, 1.5)) +
      ggtitle("60+") +
      scale_color_manual(limits = c("only eager", "both", "only reluctant", "neither"), 
                         values = c("#00BFC4", "#F8766D", "#C77CFF", "#7CAE00")) +
      theme_classic() +
      theme(legend.position="none", 
            text = element_text(size = 14),
            aspect.ratio = 1.05,
            plot.title = element_text(hjust = 0.5, size = 10))

#### Party ID (3 cat.) ####
### Democrats ###


# start by making an empty data set 
all_results <- tibble(term = NA, estimate = NA, std.error = NA, statistic = NA,
                      p.value = NA, conf.low = NA, conf.high = NA, df = NA, 
                      outcome = NA, StudyID = NA, subsample = NA)

for(i in 1:length(list.files())){
      files <- list.files()
      d <- read_csv(files[i])
      d <- mutate(d, pid3 = case_when(
            between(PartyID7, 1, 3) ~ "Democrat",
            between(PartyID7, 4, 4) ~ "Independent",
            between(PartyID7, 5, 7) ~ "Republican",
            TRUE ~ NA_character_))
      d <- filter(d, pid3 == "Democrat")
      
      if(nrow(d) == 0){next}
      
      study_results <- bind_rows(Get_ATE(d),
                                 Get_ATE(d, 0), # eager respondents
                                 Get_ATE(d, 1)) # reluctant respondents
      study_results <- add_column(study_results, subsample = c("full", "eager", "reluctant"))
      
      all_results <- add_row(all_results, study_results)
}

dem_results <- all_results[-1, ] #drop first row since it's all NAs

# PLOT DATA #

eagers <- filter(dem_results, subsample == "eager")
reluctants <- filter(dem_results, subsample == "reluctant")

plot_dat <- full_join(eagers, reluctants, by = "StudyID", suffix = c(".e", ".r"))

# creating significance-indicator variable for plot
plot_dat <- mutate(plot_dat, significance = case_when(
      p.value.e < .05 & p.value.r > .05 ~ "only eager",
      p.value.e < .05 & p.value.r < .05 ~ "both",
      p.value.e > .05 & p.value.r < .05 ~ "only reluctant",
      p.value.e > .05 & p.value.r > .05 ~ "neither"))

p6 <- plot_dat %>% filter(!is.na(significance)) %>% 
      ggplot(aes(estimate.e, estimate.r)) + 
      geom_abline(slope = 1, intercept = 0, color = "gray") + 
      geom_hline(yintercept = 0, alpha = .5, linetype = 2, color = "gray") + 
      geom_vline(xintercept = 0, alpha = .5, linetype = 2, color = "gray") + 
      geom_point(size = 1.1) + 
      geom_errorbar(aes(xmin = conf.low.e, xmax = conf.high.e), alpha = .15) + 
      geom_errorbar(aes(ymin = conf.low.r, ymax = conf.high.r), alpha = .15) + 
      xlab("") + 
      ylab("") +
      coord_cartesian(xlim =c(-1.5, 1.5), ylim = c(-1.5, 1.5)) +
      ggtitle("Democrats") +
      scale_color_manual(limits = c("only eager", "both", "only reluctant", "neither"), 
                         values = c("#00BFC4", "#F8766D", "#C77CFF", "#7CAE00")) +
      theme_classic() +
      theme(legend.position="none", 
            text = element_text(size = 14),
            aspect.ratio = 1.05,
            plot.title = element_text(hjust = 0.5, size = 10))

### Independents ###

# start by making an empty data set 
all_results <- tibble(term = NA, estimate = NA, std.error = NA, statistic = NA,
                      p.value = NA, conf.low = NA, conf.high = NA, df = NA, 
                      outcome = NA, StudyID = NA, subsample = NA)

for(i in 1:length(list.files())){
      files <- list.files()
      d <- read_csv(files[i])
      d <- mutate(d, pid3 = case_when(
            between(PartyID7, 1, 3) ~ "Democrat",
            between(PartyID7, 4, 4) ~ "Independent",
            between(PartyID7, 5, 7) ~ "Republican",
            TRUE ~ NA_character_))
      d <- filter(d, pid3 == "Independent")
      
      if(nrow(d) == 0){next}
      
      study_results <- bind_rows(Get_ATE(d),
                                 Get_ATE(d, 0), # eager respondents
                                 Get_ATE(d, 1)) # reluctant respondents
      study_results <- add_column(study_results, subsample = c("full", "eager", "reluctant"))
      
      all_results <- add_row(all_results, study_results)
}

ind_results <- all_results[-1, ] #drop first row since it's all NAs

# PLOT DATA #

eagers <- filter(ind_results, subsample == "eager")
reluctants <- filter(ind_results, subsample == "reluctant")

plot_dat <- full_join(eagers, reluctants, by = "StudyID", suffix = c(".e", ".r"))

# creating significance-indicator variable for plot
plot_dat <- mutate(plot_dat, significance = case_when(
      p.value.e < .05 & p.value.r > .05 ~ "only eager",
      p.value.e < .05 & p.value.r < .05 ~ "both",
      p.value.e > .05 & p.value.r < .05 ~ "only reluctant",
      p.value.e > .05 & p.value.r > .05 ~ "neither"))

p7 <- plot_dat %>% filter(!is.na(significance)) %>% 
      ggplot(aes(estimate.e, estimate.r)) + 
      geom_abline(slope = 1, intercept = 0, color = "gray") + 
      geom_hline(yintercept = 0, alpha = .5, linetype = 2, color = "gray") + 
      geom_vline(xintercept = 0, alpha = .5, linetype = 2, color = "gray") + 
      geom_point(size = 1.1) + 
      geom_errorbar(aes(xmin = conf.low.e, xmax = conf.high.e), alpha = .15) + 
      geom_errorbar(aes(ymin = conf.low.r, ymax = conf.high.r), alpha = .15) + 
      xlab("") + 
      ylab("") +
      coord_cartesian(xlim =c(-1.5, 1.5), ylim = c(-1.5, 1.5)) +
      ggtitle("Independents") +
      scale_color_manual(limits = c("only eager", "both", "only reluctant", "neither"), 
                         values = c("#00BFC4", "#F8766D", "#C77CFF", "#7CAE00")) +
      theme_classic() +
      theme(legend.position="none", 
            text = element_text(size = 14),
            aspect.ratio = 1.05,
            plot.title = element_text(hjust = 0.5, size = 10))


### Republicans ###


# start by making an empty data set 
all_results <- tibble(term = NA, estimate = NA, std.error = NA, statistic = NA,
                      p.value = NA, conf.low = NA, conf.high = NA, df = NA, 
                      outcome = NA, StudyID = NA, subsample = NA)

for(i in 1:length(list.files())){
      files <- list.files()
      d <- read_csv(files[i])
      d <- mutate(d, pid3 = case_when(
            between(PartyID7, 1, 3) ~ "Democrat",
            between(PartyID7, 4, 4) ~ "Independent",
            between(PartyID7, 5, 7) ~ "Republican",
            TRUE ~ NA_character_))
      d <- filter(d, pid3 == "Republican")
      
      if(nrow(d) == 0){next}
      
      study_results <- bind_rows(Get_ATE(d),
                                 Get_ATE(d, 0), # eager respondents
                                 Get_ATE(d, 1)) # reluctant respondents
      study_results <- add_column(study_results, subsample = c("full", "eager", "reluctant"))
      
      all_results <- add_row(all_results, study_results)
}

rep_results <- all_results[-1, ] #drop first row since it's all NAs

# PLOT DATA #

eagers <- filter(rep_results, subsample == "eager")
reluctants <- filter(rep_results, subsample == "reluctant")

plot_dat <- full_join(eagers, reluctants, by = "StudyID", suffix = c(".e", ".r"))

# creating significance-indicator variable for plot
plot_dat <- mutate(plot_dat, significance = case_when(
      p.value.e < .05 & p.value.r > .05 ~ "only eager",
      p.value.e < .05 & p.value.r < .05 ~ "both",
      p.value.e > .05 & p.value.r < .05 ~ "only reluctant",
      p.value.e > .05 & p.value.r > .05 ~ "neither"))

p8 <- plot_dat %>% filter(!is.na(significance)) %>% 
      ggplot(aes(estimate.e, estimate.r)) + 
      geom_abline(slope = 1, intercept = 0, color = "gray") + 
      geom_hline(yintercept = 0, alpha = .5, linetype = 2, color = "gray") + 
      geom_vline(xintercept = 0, alpha = .5, linetype = 2, color = "gray") + 
      geom_point(size = 1.1) + 
      geom_errorbar(aes(xmin = conf.low.e, xmax = conf.high.e), alpha = .15) + 
      geom_errorbar(aes(ymin = conf.low.r, ymax = conf.high.r), alpha = .15) + 
      xlab("") + 
      ylab("") +
      coord_cartesian(xlim =c(-1.5, 1.5), ylim = c(-1.5, 1.5)) +
      ggtitle("Republicans") +
      scale_color_manual(limits = c("only eager", "both", "only reluctant", "neither"), 
                         values = c("#00BFC4", "#F8766D", "#C77CFF", "#7CAE00")) +
      theme_classic() +
      theme(legend.position="none", 
            text = element_text(size = 14),
            aspect.ratio = 1.05,
            plot.title = element_text(hjust = 0.5, size = 10))

#### EDUCATION (3 cat.) ####

### HS Degree or less ###


# start by making an empty data set 
all_results <- tibble(term = NA, estimate = NA, std.error = NA, statistic = NA,
                      p.value = NA, conf.low = NA, conf.high = NA, df = NA, 
                      outcome = NA, StudyID = NA, subsample = NA)

for(i in 1:length(list.files())){
      files <- list.files()
      d <- read_csv(files[i])
      d <- mutate(d, EDUC4 = case_when(
            EDUC < 9 ~ "1No HS degree",
            EDUC == 9 ~ "2HS degree", 
            between(EDUC, 10, 11) ~ "3Some college",
            EDUC > 11 ~ "4BA or higher"))
      
      d <- filter(d, EDUC4 == "1No HS degree" | EDUC4 == "2HS degree")
      
      if(nrow(d) == 0){next}
      
      study_results <- bind_rows(Get_ATE(d),
                                 Get_ATE(d, 0), # eager respondents
                                 Get_ATE(d, 1)) # reluctant respondents
      study_results <- add_column(study_results, subsample = c("full", "eager", "reluctant"))
      
      all_results <- add_row(all_results, study_results)
}

nohs_results <- all_results[-1, ] #drop first row since it's all NAs

# PLOT DATA #

eagers <- filter(nohs_results, subsample == "eager")
reluctants <- filter(nohs_results, subsample == "reluctant")

plot_dat <- full_join(eagers, reluctants, by = "StudyID", suffix = c(".e", ".r"))

# creating significance-indicator variable for plot
plot_dat <- mutate(plot_dat, significance = case_when(
      p.value.e < .05 & p.value.r > .05 ~ "only eager",
      p.value.e < .05 & p.value.r < .05 ~ "both",
      p.value.e > .05 & p.value.r < .05 ~ "only reluctant",
      p.value.e > .05 & p.value.r > .05 ~ "neither"))

p9 <- plot_dat %>% filter(!is.na(significance)) %>% 
      ggplot(aes(estimate.e, estimate.r)) + 
      geom_abline(slope = 1, intercept = 0, color = "gray") + 
      geom_hline(yintercept = 0, alpha = .5, linetype = 2, color = "gray") + 
      geom_vline(xintercept = 0, alpha = .5, linetype = 2, color = "gray") + 
      geom_point(size = 1.1) + 
      geom_errorbar(aes(xmin = conf.low.e, xmax = conf.high.e), alpha = .15) + 
      geom_errorbar(aes(ymin = conf.low.r, ymax = conf.high.r), alpha = .15) + 
      xlab("") + 
      ylab("") +
      coord_cartesian(xlim =c(-1.5, 1.5), ylim = c(-1.5, 1.5)) +
      ggtitle("HS Degree or Less") +
      scale_color_manual(limits = c("only eager", "both", "only reluctant", "neither"), 
                         values = c("#00BFC4", "#F8766D", "#C77CFF", "#7CAE00")) +
      theme_classic() +
      theme(legend.position="none", 
            text = element_text(size = 14),
            aspect.ratio = 1.05,
            plot.title = element_text(hjust = 0.5, size = 10))


### Some college ###


# start by making an empty data set 
all_results <- tibble(term = NA, estimate = NA, std.error = NA, statistic = NA,
                      p.value = NA, conf.low = NA, conf.high = NA, df = NA, 
                      outcome = NA, StudyID = NA, subsample = NA)

for(i in 1:length(list.files())){
      files <- list.files()
      d <- read_csv(files[i])
      d <- mutate(d, EDUC4 = case_when(
            EDUC < 9 ~ "1No HS degree",
            EDUC == 9 ~ "2HS degree", 
            between(EDUC, 10, 11) ~ "3Some college",
            EDUC > 11 ~ "4BA or higher"))
      
      d <- filter(d, EDUC4 == "3Some college")
      
      if(nrow(d) == 0){next}
      
      study_results <- bind_rows(Get_ATE(d),
                                 Get_ATE(d, 0), # eager respondents
                                 Get_ATE(d, 1)) # reluctant respondents
      study_results <- add_column(study_results, subsample = c("full", "eager", "reluctant"))
      
      all_results <- add_row(all_results, study_results)
}

somecoll_results <- all_results[-1, ] #drop first row since it's all NAs

# PLOT DATA #

eagers <- filter(somecoll_results, subsample == "eager")
reluctants <- filter(somecoll_results, subsample == "reluctant")

plot_dat <- full_join(eagers, reluctants, by = "StudyID", suffix = c(".e", ".r"))

# creating significance-indicator variable for plot
plot_dat <- mutate(plot_dat, significance = case_when(
      p.value.e < .05 & p.value.r > .05 ~ "only eager",
      p.value.e < .05 & p.value.r < .05 ~ "both",
      p.value.e > .05 & p.value.r < .05 ~ "only reluctant",
      p.value.e > .05 & p.value.r > .05 ~ "neither"))

p10 <- plot_dat %>% filter(!is.na(significance)) %>% 
      ggplot(aes(estimate.e, estimate.r)) + 
      geom_abline(slope = 1, intercept = 0, color = "gray") + 
      geom_hline(yintercept = 0, alpha = .5, linetype = 2, color = "gray") + 
      geom_vline(xintercept = 0, alpha = .5, linetype = 2, color = "gray") + 
      geom_point(size = 1.1) + 
      geom_errorbar(aes(xmin = conf.low.e, xmax = conf.high.e), alpha = .15) + 
      geom_errorbar(aes(ymin = conf.low.r, ymax = conf.high.r), alpha = .15) + 
      xlab("") + 
      ylab("") +
      coord_cartesian(xlim =c(-1.5, 1.5), ylim = c(-1.5, 1.5)) +
      ggtitle("Some College") +
      scale_color_manual(limits = c("only eager", "both", "only reluctant", "neither"), 
                         values = c("#00BFC4", "#F8766D", "#C77CFF", "#7CAE00")) +
      theme_classic() +
      theme(legend.position="none", 
            text = element_text(size = 14),
            aspect.ratio = 1.05,
            plot.title = element_text(hjust = 0.5, size = 10))

### BA or higher ###


# start by making an empty data set 
all_results <- tibble(term = NA, estimate = NA, std.error = NA, statistic = NA,
                      p.value = NA, conf.low = NA, conf.high = NA, df = NA, 
                      outcome = NA, StudyID = NA, subsample = NA)

for(i in 1:length(list.files())){
      files <- list.files()
      d <- read_csv(files[i])
      d <- mutate(d, EDUC4 = case_when(
            EDUC < 9 ~ "1No HS degree",
            EDUC == 9 ~ "2HS degree", 
            between(EDUC, 10, 11) ~ "3Some college",
            EDUC > 11 ~ "4BA or higher"))
      
      d <- filter(d, EDUC4 == "4BA or higher")
      
      if(nrow(d) == 0){next}
      
      study_results <- bind_rows(Get_ATE(d),
                                 Get_ATE(d, 0), # eager respondents
                                 Get_ATE(d, 1)) # reluctant respondents
      study_results <- add_column(study_results, subsample = c("full", "eager", "reluctant"))
      
      all_results <- add_row(all_results, study_results)
}

bs_results <- all_results[-1, ] #drop first row since it's all NAs

# PLOT DATA #

eagers <- filter(bs_results, subsample == "eager")
reluctants <- filter(bs_results, subsample == "reluctant")

plot_dat <- full_join(eagers, reluctants, by = "StudyID", suffix = c(".e", ".r"))

# creating significance-indicator variable for plot
plot_dat <- mutate(plot_dat, significance = case_when(
      p.value.e < .05 & p.value.r > .05 ~ "only eager",
      p.value.e < .05 & p.value.r < .05 ~ "both",
      p.value.e > .05 & p.value.r < .05 ~ "only reluctant",
      p.value.e > .05 & p.value.r > .05 ~ "neither"))

p11 <- plot_dat %>% filter(!is.na(significance)) %>% 
      ggplot(aes(estimate.e, estimate.r)) + 
      geom_abline(slope = 1, intercept = 0, color = "gray") + 
      geom_hline(yintercept = 0, alpha = .5, linetype = 2, color = "gray") + 
      geom_vline(xintercept = 0, alpha = .5, linetype = 2, color = "gray") + 
      geom_point(size = 1.1) + 
      geom_errorbar(aes(xmin = conf.low.e, xmax = conf.high.e), alpha = .15) + 
      geom_errorbar(aes(ymin = conf.low.r, ymax = conf.high.r), alpha = .15) + 
      xlab("") + 
      ylab("") +
      coord_cartesian(xlim =c(-1.5, 1.5), ylim = c(-1.5, 1.5)) +
      ggtitle("BA or Higher") +
      scale_color_manual(limits = c("only eager", "both", "only reluctant", "neither"), 
                         values = c("#00BFC4", "#F8766D", "#C77CFF", "#7CAE00")) +
      theme_classic() +
      theme(legend.position="none", 
            text = element_text(size = 14),
            aspect.ratio = 1.05,
            plot.title = element_text(hjust = 0.5, size = 10))


#### INCOME (3 cat.) #### 

## Lower 1/3 of Income ##
# set working directory


# start by making an empty data set 
all_results <- tibble(term = NA, estimate = NA, std.error = NA, statistic = NA,
                      p.value = NA, conf.low = NA, conf.high = NA, df = NA, 
                      outcome = NA, StudyID = NA, subsample = NA)

for(i in 1:length(list.files())){
      files <- list.files()
      d <- read_csv(files[i])
      d <- mutate(d, INCOME3 = case_when(
            between(INCOME, 1, 8) ~ "Low Income",
            between(INCOME, 9, 12) ~ "Middle Income",
            between(INCOME, 13, 18) ~ "High Income"
      ))
      d <- filter(d, INCOME3 == "Low Income")
      
      study_results <- bind_rows(Get_ATE(d),
                                 Get_ATE(d, 0), # eager respondents
                                 Get_ATE(d, 1)) # reluctant respondents
      study_results <- add_column(study_results, subsample = c("full", "eager", "reluctant"))
      
      all_results <- add_row(all_results, study_results)
}

inc1_results <- all_results[-1, ] #drop first row since it's all NAs

# PLOT DATA #

eagers <- filter(inc1_results, subsample == "eager")
reluctants <- filter(inc1_results, subsample == "reluctant")

plot_dat <- full_join(eagers, reluctants, by = "StudyID", suffix = c(".e", ".r"))

# creating significance-indicator variable for plot
plot_dat <- mutate(plot_dat, significance = case_when(
      p.value.e < .05 & p.value.r > .05 ~ "only eager",
      p.value.e < .05 & p.value.r < .05 ~ "both",
      p.value.e > .05 & p.value.r < .05 ~ "only reluctant",
      p.value.e > .05 & p.value.r > .05 ~ "neither"))

p12 <- plot_dat %>% filter(!is.na(significance)) %>% 
      ggplot(aes(estimate.e, estimate.r)) + 
      geom_abline(slope = 1, intercept = 0, color = "gray") + 
      geom_hline(yintercept = 0, alpha = .5, linetype = 2, color = "gray") + 
      geom_vline(xintercept = 0, alpha = .5, linetype = 2, color = "gray") + 
      geom_point(size = 1.1) + 
      geom_errorbar(aes(xmin = conf.low.e, xmax = conf.high.e), alpha = .15) + 
      geom_errorbar(aes(ymin = conf.low.r, ymax = conf.high.r), alpha = .15) + 
      xlab("") + 
      ylab("") +
      coord_cartesian(xlim =c(-1.5, 1.5), ylim = c(-1.5, 1.5)) +
      ggtitle("Income: Lower 1/3") +
      scale_color_manual(limits = c("only eager", "both", "only reluctant", "neither"), 
                         values = c("#00BFC4", "#F8766D", "#C77CFF", "#7CAE00")) +
      theme_classic() +
      theme(legend.position="none", 
            text = element_text(size = 14),
            aspect.ratio = 1.05,
            plot.title = element_text(hjust = 0.5, size = 10))


## Middle 1/3 of Income ##
# set working directory


# start by making an empty data set 
all_results <- tibble(term = NA, estimate = NA, std.error = NA, statistic = NA,
                      p.value = NA, conf.low = NA, conf.high = NA, df = NA, 
                      outcome = NA, StudyID = NA, subsample = NA)

for(i in 1:length(list.files())){
      files <- list.files()
      d <- read_csv(files[i])
      d <- mutate(d, INCOME3 = case_when(
            between(INCOME, 1, 8) ~ "Low Income",
            between(INCOME, 9, 12) ~ "Middle Income",
            between(INCOME, 13, 18) ~ "High Income"
      ))
      d <- filter(d, INCOME3 == "Middle Income")
      
      study_results <- bind_rows(Get_ATE(d),
                                 Get_ATE(d, 0), # eager respondents
                                 Get_ATE(d, 1)) # reluctant respondents
      study_results <- add_column(study_results, subsample = c("full", "eager", "reluctant"))
      
      all_results <- add_row(all_results, study_results)
}

inc2_results <- all_results[-1, ] #drop first row since it's all NAs

# PLOT DATA #

eagers <- filter(inc2_results, subsample == "eager")
reluctants <- filter(inc2_results, subsample == "reluctant")

plot_dat <- full_join(eagers, reluctants, by = "StudyID", suffix = c(".e", ".r"))

# creating significance-indicator variable for plot
plot_dat <- mutate(plot_dat, significance = case_when(
      p.value.e < .05 & p.value.r > .05 ~ "only eager",
      p.value.e < .05 & p.value.r < .05 ~ "both",
      p.value.e > .05 & p.value.r < .05 ~ "only reluctant",
      p.value.e > .05 & p.value.r > .05 ~ "neither"))

p13 <- plot_dat %>% filter(!is.na(significance)) %>% 
      ggplot(aes(estimate.e, estimate.r)) + 
      geom_abline(slope = 1, intercept = 0, color = "gray") + 
      geom_hline(yintercept = 0, alpha = .5, linetype = 2, color = "gray") + 
      geom_vline(xintercept = 0, alpha = .5, linetype = 2, color = "gray") + 
      geom_point(size = 1.1) + 
      geom_errorbar(aes(xmin = conf.low.e, xmax = conf.high.e), alpha = .15) + 
      geom_errorbar(aes(ymin = conf.low.r, ymax = conf.high.r), alpha = .15) + 
      xlab("") + 
      ylab("") +
      coord_cartesian(xlim =c(-1.5, 1.5), ylim = c(-1.5, 1.5)) +
      ggtitle("Income: Middle 1/3") +
      scale_color_manual(limits = c("only eager", "both", "only reluctant", "neither"), 
                         values = c("#00BFC4", "#F8766D", "#C77CFF", "#7CAE00")) +
      theme_classic() +
      theme(legend.position="none", 
            text = element_text(size = 14),
            aspect.ratio = 1.05,
            plot.title = element_text(hjust = 0.5, size = 10))

## Upper 1/3 of Income ##
# set working directory


# start by making an empty data set 
all_results <- tibble(term = NA, estimate = NA, std.error = NA, statistic = NA,
                      p.value = NA, conf.low = NA, conf.high = NA, df = NA, 
                      outcome = NA, StudyID = NA, subsample = NA)

for(i in 1:length(list.files())){
      files <- list.files()
      d <- read_csv(files[i])
      d <- mutate(d, INCOME3 = case_when(
            between(INCOME, 1, 8) ~ "Low Income",
            between(INCOME, 9, 12) ~ "Middle Income",
            between(INCOME, 13, 18) ~ "High Income"
      ))
      d <- filter(d, INCOME3 == "High Income")
      
      study_results <- bind_rows(Get_ATE(d),
                                 Get_ATE(d, 0), # eager respondents
                                 Get_ATE(d, 1)) # reluctant respondents
      study_results <- add_column(study_results, subsample = c("full", "eager", "reluctant"))
      
      all_results <- add_row(all_results, study_results)
}

inc3_results <- all_results[-1, ] #drop first row since it's all NAs

# PLOT DATA #

eagers <- filter(inc3_results, subsample == "eager")
reluctants <- filter(inc3_results, subsample == "reluctant")

plot_dat <- full_join(eagers, reluctants, by = "StudyID", suffix = c(".e", ".r"))

# creating significance-indicator variable for plot
plot_dat <- mutate(plot_dat, significance = case_when(
      p.value.e < .05 & p.value.r > .05 ~ "only eager",
      p.value.e < .05 & p.value.r < .05 ~ "both",
      p.value.e > .05 & p.value.r < .05 ~ "only reluctant",
      p.value.e > .05 & p.value.r > .05 ~ "neither"))

p14 <- plot_dat %>% filter(!is.na(significance)) %>% 
      ggplot(aes(estimate.e, estimate.r)) + 
      geom_abline(slope = 1, intercept = 0, color = "gray") + 
      geom_hline(yintercept = 0, alpha = .5, linetype = 2, color = "gray") + 
      geom_vline(xintercept = 0, alpha = .5, linetype = 2, color = "gray") + 
      geom_point(size = 1.1) + 
      geom_errorbar(aes(xmin = conf.low.e, xmax = conf.high.e), alpha = .15) + 
      geom_errorbar(aes(ymin = conf.low.r, ymax = conf.high.r), alpha = .15) + 
      xlab("") + 
      ylab("") +
      coord_cartesian(xlim =c(-1.5, 1.5), ylim = c(-1.5, 1.5)) +
      ggtitle("Income: Upper 1/3") +
      scale_color_manual(limits = c("only eager", "both", "only reluctant", "neither"), 
                         values = c("#00BFC4", "#F8766D", "#C77CFF", "#7CAE00")) +
      theme_classic() +
      theme(legend.position="none", 
            text = element_text(size = 14),
            aspect.ratio = 1.05,
            plot.title = element_text(hjust = 0.5, size = 10))

#### METRO AREA ####

## Lives in a Metro Area ##
# set working directory


# start by making an empty data set 
all_results <- tibble(term = NA, estimate = NA, std.error = NA, statistic = NA,
                      p.value = NA, conf.low = NA, conf.high = NA, df = NA, 
                      outcome = NA, StudyID = NA, subsample = NA)

for(i in 1:length(list.files())){
      files <- list.files()
      d <- read_csv(files[i])
      d <- filter(d, METRO == 1)
      
      study_results <- bind_rows(Get_ATE(d),
                                 Get_ATE(d, 0), # eager respondents
                                 Get_ATE(d, 1)) # reluctant respondents
      study_results <- add_column(study_results, subsample = c("full", "eager", "reluctant"))
      
      all_results <- add_row(all_results, study_results)
}

metro1_results <- all_results[-1, ] #drop first row since it's all NAs

# PLOT DATA #

eagers <- filter(metro1_results, subsample == "eager")
reluctants <- filter(metro1_results, subsample == "reluctant")

plot_dat <- full_join(eagers, reluctants, by = "StudyID", suffix = c(".e", ".r"))

# creating significance-indicator variable for plot
plot_dat <- mutate(plot_dat, significance = case_when(
      p.value.e < .05 & p.value.r > .05 ~ "only eager",
      p.value.e < .05 & p.value.r < .05 ~ "both",
      p.value.e > .05 & p.value.r < .05 ~ "only reluctant",
      p.value.e > .05 & p.value.r > .05 ~ "neither"))

p15 <- plot_dat %>% filter(!is.na(significance)) %>% 
      ggplot(aes(estimate.e, estimate.r)) + 
      geom_abline(slope = 1, intercept = 0, color = "gray") + 
      geom_hline(yintercept = 0, alpha = .5, linetype = 2, color = "gray") + 
      geom_vline(xintercept = 0, alpha = .5, linetype = 2, color = "gray") + 
      geom_point(size = 1.1) + 
      geom_errorbar(aes(xmin = conf.low.e, xmax = conf.high.e), alpha = .15) + 
      geom_errorbar(aes(ymin = conf.low.r, ymax = conf.high.r), alpha = .15) + 
      xlab("") + 
      ylab("") +
      coord_cartesian(xlim =c(-1.5, 1.5), ylim = c(-1.5, 1.5)) +
      ggtitle("Metro Area") +
      scale_color_manual(limits = c("only eager", "both", "only reluctant", "neither"), 
                         values = c("#00BFC4", "#F8766D", "#C77CFF", "#7CAE00")) +
      theme_classic() +
      theme(legend.position="none", 
            text = element_text(size = 14),
            aspect.ratio = 1.05,
            plot.title = element_text(hjust = 0.5, size = 10))

## Lives in a Metro Area ##
# set working directory


# start by making an empty data set 
all_results <- tibble(term = NA, estimate = NA, std.error = NA, statistic = NA,
                      p.value = NA, conf.low = NA, conf.high = NA, df = NA, 
                      outcome = NA, StudyID = NA, subsample = NA)

for(i in 1:length(list.files())){
      files <- list.files()
      d <- read_csv(files[i])
      d <- filter(d, METRO == 0)
      
      study_results <- bind_rows(Get_ATE(d),
                                 Get_ATE(d, 0), # eager respondents
                                 Get_ATE(d, 1)) # reluctant respondents
      study_results <- add_column(study_results, subsample = c("full", "eager", "reluctant"))
      
      all_results <- add_row(all_results, study_results)
}

metro0_results <- all_results[-1, ] #drop first row since it's all NAs

# PLOT DATA #

eagers <- filter(metro0_results, subsample == "eager")
reluctants <- filter(metro0_results, subsample == "reluctant")

plot_dat <- full_join(eagers, reluctants, by = "StudyID", suffix = c(".e", ".r"))

# creating significance-indicator variable for plot
plot_dat <- mutate(plot_dat, significance = case_when(
      p.value.e < .05 & p.value.r > .05 ~ "only eager",
      p.value.e < .05 & p.value.r < .05 ~ "both",
      p.value.e > .05 & p.value.r < .05 ~ "only reluctant",
      p.value.e > .05 & p.value.r > .05 ~ "neither"))

p16 <- plot_dat %>% filter(!is.na(significance)) %>% 
      ggplot(aes(estimate.e, estimate.r)) + 
      geom_abline(slope = 1, intercept = 0, color = "gray") + 
      geom_hline(yintercept = 0, alpha = .5, linetype = 2, color = "gray") + 
      geom_vline(xintercept = 0, alpha = .5, linetype = 2, color = "gray") + 
      geom_point(size = 1.1) + 
      geom_errorbar(aes(xmin = conf.low.e, xmax = conf.high.e), alpha = .15) + 
      geom_errorbar(aes(ymin = conf.low.r, ymax = conf.high.r), alpha = .15) + 
      xlab("") + 
      ylab("") +
      coord_cartesian(xlim =c(-1.5, 1.5), ylim = c(-1.5, 1.5)) +
      ggtitle("Non-Metro Area") +
      scale_color_manual(limits = c("only eager", "both", "only reluctant", "neither"), 
                         values = c("#00BFC4", "#F8766D", "#C77CFF", "#7CAE00")) +
      theme_classic() +
      theme(legend.position="none", 
            text = element_text(size = 14),
            aspect.ratio = 1.05,
            plot.title = element_text(hjust = 0.5, size = 10))


#### HOUSING TYPE ####

## Lives in a single-family home ##
# set working directory


# start by making an empty data set 
all_results <- tibble(term = NA, estimate = NA, std.error = NA, statistic = NA,
                      p.value = NA, conf.low = NA, conf.high = NA, df = NA, 
                      outcome = NA, StudyID = NA, subsample = NA)

for(i in 1:length(list.files())){
      files <- list.files()
      d <- read_csv(files[i])
      d <- mutate(d, HOME_TYPE3 = case_when(
            between(HOME_TYPE, 1, 2) ~ "Single family",
            between(HOME_TYPE, 3, 3) ~ "Apartment",
            between(HOME_TYPE, 4, 5) ~ "Mobile home or other"))
      d <- filter(d, HOME_TYPE3 == "Single family")
      
      study_results <- bind_rows(Get_ATE(d),
                                 Get_ATE(d, 0), # eager respondents
                                 Get_ATE(d, 1)) # reluctant respondents
      study_results <- add_column(study_results, subsample = c("full", "eager", "reluctant"))
      
      all_results <- add_row(all_results, study_results)
}

house1_results <- all_results[-1, ] #drop first row since it's all NAs

# PLOT DATA #

eagers <- filter(house1_results, subsample == "eager")
reluctants <- filter(house1_results, subsample == "reluctant")

plot_dat <- full_join(eagers, reluctants, by = "StudyID", suffix = c(".e", ".r"))

# creating significance-indicator variable for plot
plot_dat <- mutate(plot_dat, significance = case_when(
      p.value.e < .05 & p.value.r > .05 ~ "only eager",
      p.value.e < .05 & p.value.r < .05 ~ "both",
      p.value.e > .05 & p.value.r < .05 ~ "only reluctant",
      p.value.e > .05 & p.value.r > .05 ~ "neither"))

p17 <- plot_dat %>% filter(!is.na(significance)) %>% 
      ggplot(aes(estimate.e, estimate.r)) + 
      geom_abline(slope = 1, intercept = 0, color = "gray") + 
      geom_hline(yintercept = 0, alpha = .5, linetype = 2, color = "gray") + 
      geom_vline(xintercept = 0, alpha = .5, linetype = 2, color = "gray") + 
      geom_point(size = 1.1) + 
      geom_errorbar(aes(xmin = conf.low.e, xmax = conf.high.e), alpha = .15) + 
      geom_errorbar(aes(ymin = conf.low.r, ymax = conf.high.r), alpha = .15) + 
      xlab("") + 
      ylab("") +
      coord_cartesian(xlim =c(-1.5, 1.5), ylim = c(-1.5, 1.5)) +
      ggtitle("Single-Fam. Home") +
      scale_color_manual(limits = c("only eager", "both", "only reluctant", "neither"), 
                         values = c("#00BFC4", "#F8766D", "#C77CFF", "#7CAE00")) +
      theme_classic() +
      theme(legend.position="none", 
            text = element_text(size = 14),
            aspect.ratio = 1.05,
            plot.title = element_text(hjust = 0.5, size = 10))


## Lives in an apartment or other ##
# set working directory


# start by making an empty data set 
all_results <- tibble(term = NA, estimate = NA, std.error = NA, statistic = NA,
                      p.value = NA, conf.low = NA, conf.high = NA, df = NA, 
                      outcome = NA, StudyID = NA, subsample = NA)

for(i in 1:length(list.files())){
      files <- list.files()
      d <- read_csv(files[i])
      d <- mutate(d, HOME_TYPE3 = case_when(
            between(HOME_TYPE, 1, 2) ~ "Single family",
            between(HOME_TYPE, 3, 3) ~ "Apartment",
            between(HOME_TYPE, 4, 5) ~ "Mobile home or other"))
      d <- filter(d, HOME_TYPE3 != "Single family")
      
      study_results <- bind_rows(Get_ATE(d),
                                 Get_ATE(d, 0), # eager respondents
                                 Get_ATE(d, 1)) # reluctant respondents
      study_results <- add_column(study_results, subsample = c("full", "eager", "reluctant"))
      
      all_results <- add_row(all_results, study_results)
}

house1_results <- all_results[-1, ] #drop first row since it's all NAs

# PLOT DATA #

eagers <- filter(house1_results, subsample == "eager")
reluctants <- filter(house1_results, subsample == "reluctant")

plot_dat <- full_join(eagers, reluctants, by = "StudyID", suffix = c(".e", ".r"))

# creating significance-indicator variable for plot
plot_dat <- mutate(plot_dat, significance = case_when(
      p.value.e < .05 & p.value.r > .05 ~ "only eager",
      p.value.e < .05 & p.value.r < .05 ~ "both",
      p.value.e > .05 & p.value.r < .05 ~ "only reluctant",
      p.value.e > .05 & p.value.r > .05 ~ "neither"))

p18 <- plot_dat %>% filter(!is.na(significance)) %>% 
      ggplot(aes(estimate.e, estimate.r)) + 
      geom_abline(slope = 1, intercept = 0, color = "gray") + 
      geom_hline(yintercept = 0, alpha = .5, linetype = 2, color = "gray") + 
      geom_vline(xintercept = 0, alpha = .5, linetype = 2, color = "gray") + 
      geom_point(size = 1.1) + 
      geom_errorbar(aes(xmin = conf.low.e, xmax = conf.high.e), alpha = .15) + 
      geom_errorbar(aes(ymin = conf.low.r, ymax = conf.high.r), alpha = .15) + 
      xlab("") + 
      ylab("") +
      coord_cartesian(xlim =c(-1.5, 1.5), ylim = c(-1.5, 1.5)) +
      ggtitle("Apartment") +
      scale_color_manual(limits = c("only eager", "both", "only reluctant", "neither"), 
                         values = c("#00BFC4", "#F8766D", "#C77CFF", "#7CAE00")) +
      theme_classic() +
      theme(legend.position="none", 
            text = element_text(size = 14),
            aspect.ratio = 1.05,
            plot.title = element_text(hjust = 0.5, size = 10))


#### RACE ####
## Whites ##

# set working directory


# start by making an empty data set 
all_results <- tibble(term = NA, estimate = NA, std.error = NA, statistic = NA,
                      p.value = NA, conf.low = NA, conf.high = NA, df = NA, 
                      outcome = NA, StudyID = NA, subsample = NA)

for(i in 1:length(list.files())){
      files <- list.files()
      d <- read_csv(files[i])
      d <- filter(d, RACETHNICITY == 1)
      
      if(nrow(d) == 0){next}
      
      study_results <- bind_rows(Get_ATE(d),
                                 Get_ATE(d, 0), # eager respondents
                                 Get_ATE(d, 1)) # reluctant respondents
      study_results <- add_column(study_results, subsample = c("full", "eager", "reluctant"))
      
      all_results <- add_row(all_results, study_results)
}

white_results <- all_results[-1, ] #drop first row since it's all NAs

# PLOT DATA #

eagers <- filter(white_results, subsample == "eager")
reluctants <- filter(white_results, subsample == "reluctant")

plot_dat <- full_join(eagers, reluctants, by = "StudyID", suffix = c(".e", ".r"))

# creating significance-indicator variable for plot
plot_dat <- mutate(plot_dat, significance = case_when(
      p.value.e < .05 & p.value.r > .05 ~ "only eager",
      p.value.e < .05 & p.value.r < .05 ~ "both",
      p.value.e > .05 & p.value.r < .05 ~ "only reluctant",
      p.value.e > .05 & p.value.r > .05 ~ "neither"))

p19 <- plot_dat %>% filter(!is.na(significance)) %>% 
      ggplot(aes(estimate.e, estimate.r)) + 
      geom_abline(slope = 1, intercept = 0, color = "gray") + 
      geom_hline(yintercept = 0, alpha = .5, linetype = 2, color = "gray") + 
      geom_vline(xintercept = 0, alpha = .5, linetype = 2, color = "gray") + 
      geom_point(size = 1.1) + 
      geom_errorbar(aes(xmin = conf.low.e, xmax = conf.high.e), alpha = .15) + 
      geom_errorbar(aes(ymin = conf.low.r, ymax = conf.high.r), alpha = .15) + 
      xlab("") + 
      ylab("") +
      coord_cartesian(xlim =c(-1.5, 1.5), ylim = c(-1.5, 1.5)) +
      ggtitle("White") +
      scale_color_manual(limits = c("only eager", "both", "only reluctant", "neither"), 
                         values = c("#00BFC4", "#F8766D", "#C77CFF", "#7CAE00")) +
      theme_classic() +
      theme(legend.position="none", 
            text = element_text(size = 14),
            aspect.ratio = 1.05,
            plot.title = element_text(hjust = 0.5, size = 10))

## Non-White ##

# set working directory


# start by making an empty data set 
all_results <- tibble(term = NA, estimate = NA, std.error = NA, statistic = NA,
                      p.value = NA, conf.low = NA, conf.high = NA, df = NA, 
                      outcome = NA, StudyID = NA, subsample = NA)

for(i in 1:length(list.files())){
      files <- list.files()
      d <- read_csv(files[i])
      d <- filter(d, RACETHNICITY != 1)
      
      if(nrow(d) == 0){next}
      
      study_results <- bind_rows(Get_ATE(d),
                                 Get_ATE(d, 0), # eager respondents
                                 Get_ATE(d, 1)) # reluctant respondents
      study_results <- add_column(study_results, subsample = c("full", "eager", "reluctant"))
      
      all_results <- add_row(all_results, study_results)
}

nonwhite_results <- all_results[-1, ] #drop first row since it's all NAs

# PLOT DATA #

eagers <- filter(nonwhite_results, subsample == "eager")
reluctants <- filter(nonwhite_results, subsample == "reluctant")

plot_dat <- full_join(eagers, reluctants, by = "StudyID", suffix = c(".e", ".r"))

# creating significance-indicator variable for plot
plot_dat <- mutate(plot_dat, significance = case_when(
      p.value.e < .05 & p.value.r > .05 ~ "only eager",
      p.value.e < .05 & p.value.r < .05 ~ "both",
      p.value.e > .05 & p.value.r < .05 ~ "only reluctant",
      p.value.e > .05 & p.value.r > .05 ~ "neither"))

p20 <- plot_dat %>% filter(!is.na(significance)) %>% 
      ggplot(aes(estimate.e, estimate.r)) + 
      geom_abline(slope = 1, intercept = 0, color = "gray") + 
      geom_hline(yintercept = 0, alpha = .5, linetype = 2, color = "gray") + 
      geom_vline(xintercept = 0, alpha = .5, linetype = 2, color = "gray") + 
      geom_point(size = 1.1) + 
      geom_errorbar(aes(xmin = conf.low.e, xmax = conf.high.e), alpha = .15) + 
      geom_errorbar(aes(ymin = conf.low.r, ymax = conf.high.r), alpha = .15) + 
      xlab("") + 
      ylab("") +
      coord_cartesian(xlim =c(-1.5, 1.5), ylim = c(-1.5, 1.5)) +
      ggtitle("Nonwhite") +
      scale_color_manual(limits = c("only eager", "both", "only reluctant", "neither"), 
                         values = c("#00BFC4", "#F8766D", "#C77CFF", "#7CAE00")) +
      theme_classic() +
      theme(legend.position="none", 
            text = element_text(size = 14),
            aspect.ratio = 1.05,
            plot.title = element_text(hjust = 0.5, size = 10))



#### ARRANGE INTO A GRID ####
fig.0 <- ggarrange(p1, p2, p3, p4, p5,
                   p6, p7, p8, p9, p10, 
                   p11, p12, p13, p14, p15, 
                   p16, p17, p18, p19, p20,
                   ncol = 5, nrow = 4,
                   legend = "bottom",
                   common.legend = T)
fig.0 <- annotate_figure(fig.0,
                         bottom = text_grob("Standardized Effect among Eager Respondents", color = "black"),
                         left = text_grob("Standardized Effect among Reluctant Respondents", color = "black", rot = 90))
fig.0
ggsave(file = "../../../results/Figure_H3.pdf", 
       width = 8.5, height = 11)



